CoRC logo — the Copasi R Connector

CoRC, the Copasi R Connector, links the Complex Pathway Simulator COPASI (copasi.org) and the (statistical) programming environment R (r-project.org). It provides easy access to the powerful biochemical model editing, simulation and analysis backend of Copasi from the convenient R command line interface. This allows the user to develop elaborate scripts and workflows for analyses that would require a great deal of tedious manual work otherwise. These scripts can then be run interactively from the R command line interface or send to cluster or cloud facilities for more demanding calculations.

CoRC features:

  • high-level API for Copasi in the R language

You can get more information or download CoRC from https://github.com/jpahle/CoRC/.

Installation

You can install CoRC from github with:

# install.packages("devtools")
devtools::install_github("jpahle/CoRC")
CoRC::getCopasi()

CoRC comes with the Artistic License 2.0. By using CoRC you agree to this license.

Examples, case-studies and workflows

library(tidyverse)
library(parallel)
library(CoRC)

# helper to run tasks in parallel on all cores
mapInParallel <- function(data, fun, ..., .prep = {}) {
  cl <- makeCluster(detectCores())
  clusterCall(cl = cl, fun = eval, .prep, env = .GlobalEnv)
  ret <- clusterApplyLB(cl = cl, x = parallel:::splitList(data, length(data)), fun = lapply, as_mapper(fun), ...)
  stopCluster(cl)
  flatten(ret)
}

3D trajectory plot of calcium model

loadSBML("http://www.ebi.ac.uk/biomodels-main/download?mid=BIOMD0000000329")
# Run 24 sec (2 Periods)
setTimeCourseSettings(24, intervals = 10000)

timeseries <- list(
  deterministic = runTimeCourse(),
  stochastic = runTimeCourse(method = "directMethod")
)

# simplify species names so they are valid R syntax
timeseries <- map(timeseries, set_tidy_names, TRUE)

# Create the same plot for both timeseries
plots <- map(
  timeseries,
  plotly::plot_ly,
  type = "scatter3d",
  mode = "lines",
  x = ~ G.alpha,
  y = ~ activePLC,
  z = ~ Calcium,
  color = ~ Time
)

unloadModel()

plots$deterministic
plots$stochastic

Statistics of reapeated stochastic simulations

# Run 1000 stochastic time series possibly in parallel
loadModel("https://raw.githubusercontent.com/copasi/condor-copasi/master/examples/stochastic_test.cps")
# timeseries <- 1:1000 %>% map(~ runTimeCourse())
timeseries <-
  1:1000 %>%
  # Read parallel evaluation as: first do .prep and then run the formula ~
  mapInParallel(
    .prep = quote({
      library(CoRC)
      loadModel("https://raw.githubusercontent.com/copasi/condor-copasi/master/examples/stochastic_test.cps")
    }),
    ~ runTimeCourse()
  )

# Combine all results and reshape the data
plotdata <-
  timeseries %>%
  bind_rows() %>%
  group_by(Time) %>%
  summarise_all(
    funs(mean, sd)
  ) %>%
  # gather all values so the column "name" identifies "a_mean", "b_sd" etc.
  gather(name, value, -Time) %>%
  # split up information on species (a,b,c) and type of value (mean, sd)
  separate(name, c("species", "type"), "_") %>%
  spread(type, value)

plot <-
  ggplot(data = plotdata, aes(x = Time, y = mean, group = species, tt_sd = sd)) +
  geom_ribbon(aes(ymin = mean - sd, ymax = mean + sd, fill = species), alpha = 1 / 4) +
  geom_line(aes(color = species)) +
  guides(fill = "none") +
  labs(
    x = paste0("Time (", getTimeUnit(), ")"),
    y = paste0("Concentration (", getQuantityUnit(), ")"),
    color = "Species"
  )

unloadModel()

plotly::ggplotly(plot, tooltip = c("group", "x", "y", "tt_sd"))

Parameter scan

A 2D scan over the cartesian product of two species concentration vectors.

loadSBML("https://www.ebi.ac.uk/biomodels-main/download?mid=BIOMD0000000068")
setSteadyStateSettings(calculateJacobian = FALSE, performStabilityAnalysis = FALSE)

cysteine_ref <- species("Cysteine")
adomed_ref <- species("adenosyl")

# Cartesian product of the input values
scan <- cross_df(
  list(
    cysteine = 0.3 * 10 ^ seq(0, 3, length.out = 6),
    adomed = seq(0, 100, length.out = 51)
  )
)

scan <-
  scan %>%
  mutate(
    # Calculate steady state fluxes for every row
    ss_fluxes = pmap(., function(cysteine, adomed) {
      setSpecies(key = cysteine_ref, initial.concentration = cysteine)
      setSpecies(key = adomed_ref, initial.concentration = adomed)
      ss <- runSteadyState()
      stopifnot(ss$result == "found")
      ss$reactions$concentration.flux
    }),
    # The second flux is CGS
    CGS = map_dbl(ss_fluxes, 2),
    # The third flux is TS
    TS = map_dbl(ss_fluxes, 3)
  )

plot <-
  ggplot(data = scan, aes(x = adomed, group = cysteine)) +
  geom_point(aes(y = CGS, color = "CGS")) +
  geom_point(aes(y = TS, color = "TS")) +
  geom_line(aes(y = CGS, color = "CGS")) +
  geom_line(aes(y = TS, color = "TS")) +
  labs(
    x = paste0("Abomed (", getQuantityUnit(), ")"),
    y = paste0("Flux (", getQuantityUnit(), " / ", getTimeUnit(), ")"),
    color = "Species"
  )

unloadModel()

plotly::ggplotly(plot)

A scan over random values of two species.

loadSBML("https://www.ebi.ac.uk/biomodels-main/download?mid=BIOMD0000000068")

# 10000 repeats of steady state task with random cysteine and adomed
scan <-
  1:10000 %>%
  # Read parallel evaluation as: first do .prep and then run the formula ~
  mapInParallel(
    .prep = quote({
      library(CoRC)
      loadSBML("https://www.ebi.ac.uk/biomodels-main/download?mid=BIOMD0000000068")
      setSteadyStateSettings(calculateJacobian = FALSE, performStabilityAnalysis = FALSE)
      cysteine_ref <- species("Cysteine")
      adomed_ref <- species("adenosyl")
    }),
    ~ {
      cysteine <- 0.3 * 10 ^ runif(1L, min = 0, max = 3)
      # cysteine <- runif(1L, min = 0.3, max = 300)
      adomed <- runif(1L, min = 0, max = 100)
      setSpecies(
        key = c(cysteine_ref, adomed_ref),
        initial.concentration = c(cysteine, adomed)
      )
      ss <- runSteadyState()
      stopifnot(ss$result == "found")
      list(
        cysteine = cysteine,
        adomed = adomed,
        CGS = ss$reactions$concentration.flux[2],
        TS = ss$reactions$concentration.flux[3]
      )
    }
  )

# Combine all results and reshape the data
plotdata <-
  scan %>%
  bind_rows() %>%
  gather(reaction, flux, CGS, TS)

plot <-
  ggplot(data = plotdata, aes(x = adomed, y = flux, group = reaction, tt_cys = cysteine)) +
  geom_point(aes(color = reaction), alpha = 1 / 10, size = 3 / 4) +
  labs(
    x = paste0("Abomed (", getQuantityUnit(), ")"),
    y = paste0("Flux (", getQuantityUnit(), " / ", getTimeUnit(), ")"),
    color = "Species"
  )

unloadModel()

plotly::ggplotly(plot, tooltip = c("tt_cys", "x", "y"))

Parameter Estimation

loadSBML("http://www.ebi.ac.uk/biomodels-main/download?mid=BIOMD0000000010")

# get timeseries for the record
data_before <-
  runTimeCourse(1000, 1) %>%
  set_tidy_names(TRUE)

# read experimental data
data_experimental <-
  read_tsv("data/MAPKdata.txt") %>%
  rename(Time = time, "Mos-P" = "MAPKKK-P", "Erk2-P" = "MAPK-P") %>%
  set_tidy_names(TRUE)

# define the experiments for copasi
fit_experiments <- defineExperiments(
  data = data_experimental,
  type = c("time", "dependent", "dependent"),
  mapping = c(NA, species(c("Mos-P", "Erk2-P"), "Concentration"))
)

# define the parameters for copasi
fit_parameters <-
  map(
    parameter(c("n).V1", "V2", "V5", "V6", "V9", "V10")),
    ~ {
      val <- getParameters(.x)$value
      defineParameter(parameter(.x, "Value"), lower.bound = val * 0.1, upper.bound = val * 1.9, start.value = val)
    }
  )

result <-
  runParamEst(
    parameters = fit_parameters,
    experiments = fit_experiments,
    method = "LevenbergMarquardt",
    updateModel = TRUE
  )

# get timeseries for the record
data_after <-
  runTimeCourse(1000, 1) %>%
  set_tidy_names(TRUE)

plots <- list(
  Erk2.P =
    ggplot(mapping = aes(x = Time, y = Erk2.P)) +
    geom_point(data = data_experimental, aes(color = "experimental")) +
    geom_line(data = data_before, aes(color = "before")) +
    geom_line(data = data_after, aes(color = "after")) +
    scale_color_manual(values = c(before = "red", after = "green", experimental = "black")) +
    labs(
      x = paste0("Erk2-P (", getQuantityUnit(), ")"),
      y = paste0("Time (", getTimeUnit(), ")"),
      color = "Series"
    ),
  Mos.P =
    ggplot(mapping = aes(x = Time, y = Mos.P)) +
    geom_point(data = data_experimental, aes(color = "experimental")) +
    geom_line(data = data_before, aes(color = "before")) +
    geom_line(data = data_after, aes(color = "after")) +
    scale_color_manual(values = c(before = "red", after = "green", experimental = "black")) +
    labs(
      x = paste0("Mos-P (", getQuantityUnit(), ")"),
      y = paste0("Time (", getTimeUnit(), ")"),
      color = "Series"
    )
)

unloadModel()

result$fitted.values
#> # A tibble: 2 x 5
#>   fitted.value objective.value root.mean.square error.mean
#>          <chr>           <dbl>            <dbl>      <dbl>
#> 1     [Erk2-P]        10.08304         1.058460  0.3290446
#> 2      [Mos-P]        25.45444         1.681747 -0.2829374
#> # ... with 1 more variables: error.mean.std..deviation <dbl>
result$parameters
#> # A tibble: 6 x 8
#>                            parameter lower.bound start.value     value
#>                                <chr>       <dbl>       <dbl>     <dbl>
#> 1             (MAPKKK activation).V1       0.250   2.4643673 2.4643673
#> 2           (MAPKKK inactivation).V2       0.025   0.2465618 0.2465618
#> 3 (dephosphorylation of MAPKK-PP).V5       0.075   0.8817485 0.8817485
#> 4  (dephosphorylation of MAPKK-P).V6       0.075   1.4250000 1.4250000
#> 5  (dephosphorylation of MAPK-PP).V9       0.050   0.7280555 0.7280555
#> 6  (dephosphorylation of MAPK-P).V10       0.050   0.7070213 0.7070213
#> # ... with 4 more variables: upper.bound <dbl>, std..deviation <dbl>,
#> #   coeff..of.variation.... <dbl>, gradient <dbl>

plotly::ggplotly(plots$Erk2.P)
plotly::ggplotly(plots$Mos.P)